### Replication code for "Misdemeanor Disenfranchisement?"
### October 2018
### See the readme file for more details on what goes where in this replication package
### Contact Ariel White with questions: arwhi@mit.edu


# first pull in, subset, parse names 
rm(list=ls())
library(data.table)
library(lubridate)
#setwd("/home/ariel/Dropbox (MIT)/Texas/Harris_fullsentencing")
load("harrisfirsttime200014_withfutureanddisps.Rdata") 
harrisoff <- collapsedfirsttime; rm(collapsedfirsttime)

misdemeanors <- subset(harrisoff, harrisoff$crt<16); dim(misdemeanors) #keep only misdemeanor cases
summary(misdemeanors$totalsentencedays); summary(misdemeanors$jail); summary(misdemeanors$nonconv); 
misdemeanors[is.na(totalsentencedays)==T, sentencedays :=0]

#need to parse these names: output just the names
subset <- misdemeanors
defnames <- subset$def_nam

write.table(defnames, file= "Harrisco_defendantnames_indsubset.txt", row.names=F, col.names=F, sep=",")

#now run parsing script
system('python "parsedefendantnames.py"')
#and merge the parsed names back in.
dnames <- read.table("Harrisco_defendantnames_indsubset_parsed.txt", header=T, sep="|")
defendants <- data.table(subset, dnames)
head(defendants)

save(defendants, file="Harris0014_parseddefendants_withfuture.Rdata")  

#####################################################################################
# now, try matching to the voter file   

rm(list=ls()) 
library(data.table)

setwd("/nfs/home/A/awhite/shared_space/FLvote/Texas/merging")
harris <- data.table(read.csv("export.csv", header=T, stringsAsFactors=F)) ##note this is actually the whole state file, though named harris here.
#setwd("/nfs/projects_nobackup/f/FLvote/Texas/merging")
load("Harris0014_parseddefendants_withfuture.Rdata") #"defendants".
library(lubridate)

#match defendants to voter file
setnames(defendants, "def_yob", "voterYOB")
defendants[, voterfname := as.character(DefFirstName)]
defendants[, voterlname := as.character(DefLastName)]
defendants[, voterfinitial := substr(voterfname, 1,1)]
names(defendants)

defendants[, casedate:= ymd(firstcasedate)]
#either comment the below rows or don't; if you don't, the below summary() calls will match the paper (correct time window). 
#but I've got them commented out here to produce a merged/deidentified dataset with a longer timeframe for various robustness checks.  
#defendants<- defendants[casedate < "2012-11-06",]
#defendants<- defendants[casedate > "2008-11-04",]
harris[,registered :=1]

#label names here.
setnames(harris, "middle_name", "votermname")
harris[, voterfname := toupper(first_name)]
harris[, voterlname := toupper(last_name)]
harris[, voterfinitial := substr(voterfname, 1, 1)]
library(lubridate)
harris[, voterDOB := ymd(born_at)]
defendants[, voterDOB := ymd(def_dob)]
defendants[,firstcasedate := NULL] #duplicated column causing problems.

voter1 <- merge(harris, defendants, by=c("voterlname","voterfinitial", "voterDOB"), allow.cartesian=T, all.y=T) #last name, first initial, DOB
dim(voter1); dim(harris); dim(defendants)
length(unique(voter1$def_spn))
sum(voter1$registered, na.rm=T)/nrow(voter1)

#trim down duplicates using first name string match.
require(stringdist)
voter1$fnamematchdist <- mapply(stringdist, voter1$voterfname.x, voter1$voterfname.y, method="jw", p=0)
summary(voter1$fnamematchdist) #runs 0-1, 0 is perfect match.
#most are perfect matches (due to birthdate)-- fuzzy first-name matching isn't doing that much work. 

setkey(voter1, def_spn, fnamematchdist) #sort within birth records by match quality
#then drop any matches that are v. bad, and also keep only the best one of duplicated matches.
voter1[, n:=1:.N, by=list(def_spn)]
voter1.1<-voter1[n==1]
dim(voter1.1); dim(voter1)
length(unique(voter1.1$def_spn));(dim(voter1.1)) #no more dups.

voter1.1[fnamematchdist >.2 & is.na(fnamematchdist)==F, registered := 0] #drop out the behavioral stuff from the file for these bad matches, but keep the defendant observations. 
voter1.1[fnamematchdist >.2 & is.na(fnamematchdist)==F, general_2010 := NA]
voter1.1[fnamematchdist >.2 & is.na(fnamematchdist)==F, general_2008 := NA]
voter1.1[fnamematchdist >.2 & is.na(fnamematchdist)==F, vh12g1 := NA]
voter1.1[fnamematchdist >.2 & is.na(fnamematchdist)==F, vh08g1 := NA]
#see below for some code that plays around with this cutpoint and makes the plot for the SI. 

voter1 <- voter1.1
rm(voter1.1)
head(voter1)
sum(voter1$registered, na.rm=T)

#set up outcome vars for voting
voter1[, vote2012 := 0]
voter1[vh12g1>0 & is.na(vh12g1)==F, vote2012 := 1]
sum(voter1$vote2012, na.rm=T) 

voter1[is.na(registered)==T, registered:=0]
voter1[is.na(vote2012)==T, vote2012:=0]
sum(voter1$registered, na.rm=T)/nrow(voter1)
sum(voter1$vote2012, na.rm=T)/nrow(voter1)
sum(voter1$vote2012, na.rm=T)/sum(voter1$registered, na.rm=T)

#########################################
## Setup for analysis:
#########################################

#clean up some covars:
voter1[, male := NA]; voter1[def_sex=="M", male := 1]; voter1[def_sex=="F", male := 0]
voter1[, black := NA]; voter1[def_rac == "B", black:= 1]; 
voter1[(is.na(def_rac)==F) & (def_rac != "B"), black := 0]
voter1[ageatfile < 10950,over30 := 0]
voter1[ageatfile >= 10950,over30 := 1]

voter1[, sent1plus:= 0 ];voter1[totalsentencedays>=365, sent1plus:= 1 ]
summary(voter1$sent1plus) 
voter1[, sent1mplus:= 0 ];voter1[totalsentencedays>=30, sent1mplus:= 1 ]
summary(voter1$sent1mplus) 
voter1[,sent1mavg:= mean(sent1mplus, na.rm=T), by=crt]; summary(voter1$sent1mavg)

#rename things just so old code works
setnames(voter1, "anyjail", "jail")
setnames(voter1, "anyfine", "fine")
setnames(voter1, "anyprobation", "probation")
voter1[, sentencedays:= totalsentencedays]

#calc instruments by year.
voter1[,crtconvrate1:= mean(anyconv, na.rm=T), by=list(crt, fyear)]
voter1[,crtsentavg1:= mean(sentencedays, na.rm=T), by=list(crt, fyear)]
voter1[,crtjailavg1:= mean(jail, na.rm=T), by=list(crt, fyear)]
voter1[,sent1mavg1:= mean(sent1mplus, na.rm=T), by=list(crt, fyear)]
voter1[,crtfineavg1:= mean(fine, na.rm=T), by=list(crt, fyear)]
voter1[,crtprobationavg1:= mean(probation, na.rm=T), by=list(crt, fyear)]

#also create the simplest court dummies.
inds <- unique(voter1$crt) #15 courtrooms
voter1[,paste("crt_", (inds), sep=""):=lapply(inds,function(x)crt==x)]

#deidentify and save this: this will be the longer-term data for some SI analyses. 

keep <- c("fyear", "registered", "crt", "voterYOB", "def_sex", "def_rac", "ageatfile", "sentencedays", "jail", "fine", "probation", "anyconv", "mostsevcharge", "numcases", "felonies", "misdemeanors", "felonyconvictions", "jailsentences", "casedate", "vote2012", "male", "black", "over30", "sent1plus", "sent1mplus", "sent1mavg", "crtconvrate1","crtsentavg1","crtjailavg1","sent1mavg1","crtfineavg1","crtprobationavg1","disposition", "com_off_lit_1", "crt_9","crt_1","crt_13","crt_10","crt_2","crt_11","crt_14","crt_7","crt_12","crt_15","crt_3","crt_8","crt_4","crt_6","crt_5", "vh08g1", "vh10g1")

voter1long <- subset(voter1, select=keep)
dim(voter1long)

save(voter1long, file="defendants_voter1_manyyears_deidentified.Rdata") 

#####################################################################################
## Then, limit to analysis time period and do some additional merging for SI analyses. 

voter1<- voter1[casedate < "2012-11-06",]
voter1<- voter1[casedate > "2008-11-04",]

#merge in name database for surname piece
library(foreign)
names100 <- read.csv("app_c.csv", stringsAsFactors=F)
head(names100); dim(names100)
spanish100 <- names100[names100$pcthispanic>90,]
spsn <- data.table(spanish100$name)
setnames(spsn, "V1", "voterlname")
spsn[,spanishsurname:=1]

#okay, now merge.
voter1s <- merge(voter1, spsn, by="voterlname", all.x=T)
dim(voter1); dim(voter1s); sum(voter1s$spanishsurname, na.rm=T)
voter1 <- voter1s

voter1full <- copy(voter1)

#####################################################################################
# Add in homeownership data as well. 

load("harrisassessment2008_ownerspropertiesmerged.Rdata") #"ownacct" (home ownership data collected from assessor's office)
head(ownacct$name) #note that names don't seem reliably formatted.

# approach: look for name matches (defendant first and last names both appear in the owner name field), plus restrict based on some address data 
zips1 <- read.table("HarrisCoZipcodes.txt", quote="", sep="", fill=T, stringsAsFactors=F)
zips <- as.numeric(zips1$V1)
voter1h <- voter1full[voter1full$def_zip %in% zips]; dim(voter1); dim(voter1h) #restrict to Harris Co. zipcodes in defendant addresses 

ptm <- proc.time()
List <- list()
for (i in 1:nrow(voter1h)){
	match1 <- ownacct[grepl(voter1h$DefLastName[i], ownacct$name)==T, ] #is last name of def in the name field for owners?
	namesmatch <- match1[grepl(voter1h$DefFirstName[i], match1$name)==T, ] #first name?
	allmatch <- namesmatch[as.numeric(namesmatch$site_addr_3) == voter1h$def_zip[i], ] #zip code?
	if(nrow(allmatch)>0) allmatch$defendant <- voter1h$def_spn[i] #add it to the list
	List[[i]]<- allmatch
}
allmatches <- do.call(rbind, List)
proc.time()-ptm

dim(allmatches) #lots
length(unique(allmatches$defendant)) #lots of dupes for a few people.

#next, merge this to the main data and then trim out bad matches using additional data.
voter1h[, index:= 1:.N]
allmatches$ownersfile <- 1
voter1h[, sentencedays:=NULL] 
fullmerge <- merge(voter1h, allmatches, by.x="def_spn", by.y="defendant", all.x=T)
#fullmerge <- merge(voter1h, allmatches[is.na(allmatches$acct)==F,], by.x="def_spn", by.y="defendant", all.x=T)
dim(fullmerge); dim(voter1h)
unmatched <- fullmerge[is.na(fullmerge$ownersfile)==T,]; dim(unmatched) #most of them
matches <- fullmerge[fullmerge$ownersfile==1,]; dim(matches)
#one quick cut: see if the street numbers are even close.
matches$housenum <-as.numeric(gsub("([0-9]+).*$", "\\1", matches$site_addr_1)) #pull out house numbers from assessment data
matches$def_housenum <- as.numeric(trimws(matches$def_stnum))
matches$streetnum <- ifelse(matches$housenum == matches$def_housenum, 1, 0)
sum(matches$streetnum, na.rm=T)

goodmatch <- matches[matches$streetnum==1,]

#additional filter: fuzzy match on street name, just to make sure they're roughly similar
require(stringdist)
goodmatch$ownstreet <- gsub("\\d", "",  goodmatch$site_addr_1)
goodmatch$stringdist_street <- mapply(stringdist, goodmatch$def_stnam, goodmatch$ownstreet, method="jw", p=0)
summary(goodmatch$stringdist_street)

bestmatch <- goodmatch[goodmatch$stringdist_street<.45,]
dim(bestmatch)
bestmatch[, homeowner:= 1]
bestmatch <- unique(bestmatch, by="def_spn") 
unmatched <- voter1h[!(voter1h$def_spn %in% bestmatch$def_spn),]; dim(unmatched)
unmatched[, homeowner:= 0]
homeownership <- data.table(rbind(bestmatch, unmatched, fill=TRUE)); dim(homeownership)

#wait, merge this back into the main dataset before exporting it? 
voter1full[def_spn %in% homeownership$def_spn, homeownership := 0]
voter1full[def_spn %in% bestmatch$def_spn, homeownership := 1]


#####################################################################################
#Now drop names/exact DOBs and output deidentified version for replication package
colnames(voter1full)
keep <- c("fyear", "registered", "crt", "voterYOB", "def_sex", "def_rac", "def_zip", "ageatfile", "sentencedays", "jail", "fine", "probation", "anyconv", "mostsevcharge", "numcases", "felonies", "misdemeanors", "felonyconvictions", "jailsentences", "casedate", "vote2012", "male", "black", "over30", "sent1plus", "sent1mplus", "sent1mavg", "crtconvrate1","crtsentavg1","crtjailavg1","sent1mavg1","crtfineavg1","crtprobationavg1","disposition", "com_off_lit_1", "crt_9","crt_1","crt_13","crt_10","crt_2","crt_11","crt_14","crt_7","crt_12","crt_15","crt_3","crt_8","crt_4","crt_6","crt_5", "vh08g1", "vh10g1", "spanishsurname", "homeownership")

voter1 <- subset(voter1full, select=keep)

save(voter1, file="defendants_voter1_deidentified.Rdata") #this is main analysis dataset. 


####################################################################################
# now generate a dataset with neighborhood characteristics (based on address)
voter1 <- voter1full 
load("Harrisdefendants20002012_Arcgeocoded.Rdata")
harrismatch <- geodef12flat

#trim down to Harris county addresses? using zipcode http://www.zillow.com/browse/homes/tx/harris-county/
Harriscozips <- c(63362, 77002, 77004, 77003, 77006, 77005, 77008, 77007, 77010, 77009, 77012, 77011, 75032, 77014, 77013, 77016, 63383, 77015, 77018, 77017, 77020, 77019, 77022, 77021, 77024, 77023, 77026, 77025, 77028, 77027, 77030, 77029, 77032, 77031, 77034, 77033, 77036, 77035, 77038, 77037, 77040, 77039, 77042, 77041, 77044, 77043, 77046, 77045, 77048, 77047, 77050, 77049, 77051, 77054, 77053, 77056, 77055, 77058, 28056, 77057, 77060, 77059, 77062, 77061, 77064, 77063, 77066, 77065, 77068, 77067, 77070, 77069, 76048, 77072, 77071, 77074, 77073, 77076, 77075, 77078, 77077, 77080, 77079, 77082, 77081, 77084, 77083, 77086, 77085, 77088, 77087, 77090, 75134, 77089, 77092, 77091, 77094, 78108, 77093, 77096, 77095, 77098, 77099, 77204, 63627, 75160, 77217, 77238, 77249, 77255, 75229, 77266, 77268, 76226, 77306, 77318, 77316, 77325, 76247, 77334, 78266, 77336, 77339, 77338, 77345, 76270, 77346, 77354, 33935, 77356, 77355, 77357, 77362, 77365, 77373, 77375, 77377, 77379, 77381, 77380, 77383, 77382, 77385, 80498, 77384, 77386, 77389, 77388, 77391, 77396, 77401, 76437, 77406, 77410, 77423, 77429, 77433, 77441, 76472, 77445, 77447, 77450, 77449, 77459, 76513, 77469, 77471, 77478, 77477, 77479, 77482, 77484, 77489, 77493, 77494, 77503, 77502, 77505, 77504, 36862, 77507, 77506, 77510, 77514, 77521, 77520, 77530, 77532, 77531, 77523, 77407, 77536, 77498, 77535, 77539, 79708, 77546, 77545, 77547, 77554, 77562, 77571, 77573, 77578, 77581, 77584, 77583, 77586, 77587, 77590, 77598, 77650, 77663, 75758, 78669, 79938, 77845, 78734)
dim(harrismatch)
harrismatch <- harrismatch[harrismatch$ARC_ZIP %in% Harriscozips, ] 
harrismatch <- harrismatch[harrismatch$Score >48,]

harrismatch <- harrismatch[harrismatch$crt<16,] # and for now keep only misdemeanants too.
dim(harrismatch)
harrismatch$firstcase_date <-  as.Date(as.character(harrismatch$firstcased), format = "%Y%m%d")
harrismatch <- harrismatch[harrismatch$firstcase_date > "2008-11-04" & harrismatch$firstcase_date <= "2012-11-06",] 
#clean up/subset as needed.
colnames(harrismatch)[colnames(harrismatch) == "coords.x2"] <- "Latitude" 
colnames(harrismatch)[colnames(harrismatch) == "coords.x1"] <- "Longitude" 
harrismatch$firstcase_date <-  as.Date(as.character(harrismatch$firstcased), format = "%Y%m%d")

#now try merging back based on def_spn.
voter1[, sentencedays:=NULL] #duplicated col causing problems
geo <- merge(voter1, harrismatch, by="def_spn"); dim(geo); dim(voter1) #about as expected--around 2/3 geocoded.

#now put those addresses in census tracts and look at local characteristics.
library(rgdal)
currwd <- getwd()
setwd(paste(currwd,"/censustracts_withcensusdata2010",sep="")) #need to update this-- go find this data again and put in RCE. 
Harristracts <- readOGR(dsn="./", layer = "Tract_2010Census_DP1")
setwd(paste(currwd))
summary(Harristracts); proj4string(Harristracts)

alldefsproj <- geo
coordinates(alldefsproj) <- c("Longitude", "Latitude")
proj4string(alldefsproj) <- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0") 
##Assign voters to tracts
proj4string(alldefsproj)
harrissampleT2 <- spTransform(alldefsproj, CRS(proj4string(Harristracts)))
join1d <- over(harrissampleT2, Harristracts)
names(join1d)

#need to join this to the original dataset.
dim(geo); dim(join1d)
alldefscensus <- cbind(geo, join1d)
dim(alldefscensus)

#okay, now get a little trickier: want to find or merge in local economic data and use this to split sample into more/less economically vulnerable.
dim(alldefscensus)
library(foreign)
acspov <- read.csv("./censustracts_withcensusdata2010/ACS_10_SF4_B17001_with_ann.csv", stringsAsFactors=F)
#now merge this onto join1, using census tract vars.  
names(acspov)
head(acspov$GEO.id); head(acspov$GEO.id2)
names(join1d)
head(join1d$GEOID10)
class(acspov$GEO.id2)
join1d$GEO.id2 <- as.character(join1d$GEOID10)

#set up some poverty measures neatly, then subset so I'm only adding them onto this monster df.
head <- head(acspov)
head(acspov)[1:10]
acspov <- acspov[-1,] #drop the row of labels.
acspov$population <- as.numeric(acspov$HD01_VD01)
acspov$poor <- as.numeric(acspov$HD01_VD02)
acspov$povrate <- acspov$poor/acspov$population
summary(acspov$povrate) #looks about right, 0-70%.

povframe <- subset(acspov, select=c("GEO.id2","population", "poor", "povrate"))
dim(acspov); dim(povframe)

alldefscensus$GEO.id2 <- as.character(alldefscensus$GEOID10)
join2d <- merge(alldefscensus, povframe, by="GEO.id2"); dim(alldefscensus); dim(join2d); dim(povframe)

#now deidentify and save this too.
join2dfull <- copy(join2d)
keeppov <- c("fyear.x", "crt.x", "registered", "voterYOB", "jail", "fine", "probation",  "mostsevcharge", "casedate", "vote2012", "male", "black", "over30",  "crtjailavg1","sent1mavg1", "povrate")

join2d <- subset(join2d, select=keeppov)

save(join2d, file="defendants_join2d_deidentified.Rdata") 

#####################################################################################
## now, benchmark the voter file for SI

#look at total 2014 registration, 2012 turnout, 2008 turnout
#put in benchmarks from here: http://www.sos.state.tx.us/elections/historical/70-92.shtml

real2014reg <- 14025441
realmid2014reg <- 13601324
real2012turnout <-7993851 
real2008turnout <- 8077795
real2004turnout <- 7410765

##2014 reg:
real2014reg - nrow(harris)
realmid2014reg - nrow(harris)
#so looks about right, if they pulled it in the summer.

##2012 vote
harris[, vote2012 := 0]
harris[vh12g1>0 & is.na(vh12g1)==F, vote2012 := 1]
sum(harris$vote2012)
real2012turnout - sum(harris$vote2012) #not all that many.
(real2012turnout - sum(harris$vote2012))/real2012turnout

##2008 vote
harris[, vote2008 := 0]
harris[vh08g1>0 & is.na(vh08g1)==F, vote2008 := 1]
sum(harris$vote2008)
real2008turnout - sum(harris$vote2008) #many more
(real2008turnout - sum(harris$vote2008))/real2008turnout

##2004 vote
harris[, vote2004 := 0]
harris[vh04g1>0 & is.na(vh04g1)==F, vote2004 := 1]
sum(harris$vote2004)
real2004turnout - sum(harris$vote2004) #now up to almost a quarter.
(real2004turnout - sum(harris$vote2004))/real2004turnout

sos <- c(real2012turnout,real2008turnout,real2004turnout)
nationb <- c(sum(harris$vote2012), sum(harris$vote2008), sum(harris$vote2004))
compare <- as.data.frame(cbind(sos, nationb))
compare$diff <- compare$sos - compare$nationb
compare$pctdiff <- compare$diff / compare$sos

rownames(compare) <- c("2012", "2008", "2004")
colnames(compare) <- c("SOS", "Voter File", "Difference", "Pct. Diff")
library(stargazer)
stargazer(compare, out = "voterfilebenchmark.tex", label="voterfilebenchmark", summary=F) #Table A8 in SI

#####################################################################################
## now, quick check for the SI: what if cutpoints were different (compared to .2)? 
## run through various possible cutoffs.  
## for each, ck what the overall reg/voting rate is, as well as the jail effect ests (all/black)

cutpointdrops <- as.data.frame(matrix(nrow=20, ncol=9))
colnames(cutpointdrops) <- c("cutpoint", "registration", "vote2012", "overallest","overalllowCI", "overallhighCI", "blackest", "blacklowCI", "blackhighCI")

for (i in 1:length(seq(.1, .3, by=.01))){
	cutpointdrops[i,1] <- cutpoint <-  seq(.1, .3, by=.01)[i]
	#then drop any matches that are v. bad, and also keep only the best one of duplicated matches.
	voter2 <- copy(voter1full)
	voter2[, n:=1:.N, by=list(def_spn)]
	voter1.1<-voter2[n==1]
	voter1.1[fnamematchdist >cutpoint & is.na(fnamematchdist)==F, registered := 0] #drop out behavioral stuff for bad matches, but keep the defendant observations.
	voter1.1[fnamematchdist >cutpoint & is.na(fnamematchdist)==F, vh12g1 := NA]
	voter2 <- voter1.1;rm(voter1.1)
	#set up outcome vars for voting
	voter2[, vote2012 := 0]
	voter2[vh12g1>0 & is.na(vh12g1)==F, vote2012 := 1]
	voter2[is.na(registered)==T, registered:=0]
	voter2[is.na(vote2012)==T, vote2012:=0]
	cutpointdrops[i,2] <- mean(voter2$registered, na.rm=T)
	cutpointdrops[i,3] <- mean(voter2$vote2012, na.rm=T)
	## now run the main analyses & save.
	voter2[,jail:= anyjail]
	voter2[,crtjailavg1:= mean(jail, na.rm=T), by=list(crt, fyear)]
	viv1.1 <- ivreg(vote2012 ~ jail + fyear| crtjailavg1 + fyear, data=voter2); summary(viv1.1) #main overall specification
	cutpointdrops[i,4] <- coef(viv1.1)[2]
	cutpointdrops[i,5] <- confint(viv1.1)[2]
	cutpointdrops[i,6] <-  confint(viv1.1)[8]
	## and run separately by race
	black <- voter2[def_rac=="B",]
	black[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]
	iv2b <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = black); summary(iv2b)
	cutpointdrops[i,7]  <- coef(iv2b)[2]
	cutpointdrops[i,8] <- confint(iv2b)[2]
	cutpointdrops[i,9]  <- confint(iv2b)[8]
	rm(iv2b, viv1.1, voter2)

}

# plot these (Figure A3 in the SI)
pdf("SI_mergecutpoints_black.pdf")
plot(cutpointdrops$cutpoint, cutpointdrops$blackest, ylim=c(-.25, .05), main="Estimated Effects of Jail on Voting for Black Defendants, \n Sensitivity to Merge Cutpoints", xlab="String Distance Cutoff for Match Quality", ylab="Estimated Effect on 2012 Turnout")
abline(h=0, col="lightgray", lty=2, lwd=2)
segments(cutpointdrops$cutpoint, cutpointdrops$blacklowCI,cutpointdrops$cutpoint,cutpointdrops$blackhighCI)
points(cutpointdrops$cutpoint[11], cutpointdrops$blackest[11], col="red", pch=20)
dev.off()

pdf("SI_mergecutpoints_all.pdf")
plot(cutpointdrops$cutpoint, cutpointdrops$overallest, ylim=c(-.13, .05), main="Estimated Effects of Jail on Voting for All Defendants, \n Sensitivity to Merge Cutpoints", xlab="String Distance Cutoff for Match Quality", ylab="Estimated Effect on 2012 Turnout")
abline(h=0, col="lightgray", lty=2, lwd=2)
segments(cutpointdrops$cutpoint, cutpointdrops$overalllowCI,cutpointdrops$cutpoint,cutpointdrops$overallhighCI)
points(cutpointdrops$cutpoint[11], cutpointdrops$overallest[11], col="red", pch=20)
dev.off()
